f1 = open("Walden.txt",encoding="utf-8")
#打开文件  格式时utf-8
und = f1.read()
#读文件 把他传给und
f1.close()
#关闭文件

und2 = und.lower() #把und里的所有大写变成小写并传给und2

import re

und2 = re.sub(r'[;,.?;":\'!-1234567890$+|/]',' ',und2)
#把除字母都替换成空格
# und2=re.sub("\r\n"," ",und2)
c=und2.split() #把提取出来的文件传给c
# print(c)


from collections import Counter
# 统计字符串出现的次数
ros = Counter(c)   #把值传给ros
# print(ros)
und4 = ros.most_common()
print(und4)

ros1 = {} #定义一个空字典
for i in und4:
    ros1[i[0]]=i[1] #把und4的内容传给ros1的字典里
print(ros1)



